################################################
# Cleaning regional variables and ERM data
################################################
library(tidyverse)
library(regions)

# To run this code, you need to acquire raw regional data at Eurostat and the ERM micro dataset.

setwd() # set working directory to the folder with the replication materials

# 1. total population (in thousands)
pop <- read.csv("population.csv")
pop <- pop %>%
  select(GEO, Value) %>%
  mutate(Value = ifelse(Value==":", NA, Value)) %>%
  mutate(Value = gsub(",", "", Value)) %>%
  mutate(Value = as.numeric(Value)/1000) %>%
  rename(pop = Value)

region_vars <- pop

# 2. employment
emp <- read.csv("employment.csv")
emp <- emp %>%
  select(GEO, Value) %>%
  mutate(Value = ifelse(Value==":", NA, Value)) %>%
  mutate(Value = gsub(",", "", Value)) %>%
  mutate(Value = as.numeric(Value)) %>%
  rename(employment = Value)

region_vars <- region_vars %>% left_join(emp, by = "GEO")

# 3. unemployment rate (2015-2017)
unemp <- read.csv("unemployment_rate.csv")
unemp <- unemp %>%
  select(GEO, TIME, Value) %>%
  mutate(Value = ifelse(Value==":", NA, Value)) %>%
  mutate(Value = gsub(",", "", Value)) %>%
  mutate(Value = as.numeric(Value)) %>%
  rename(unemp_rate = Value)

# Calculate average over 2015-2017
unemp <- unemp %>%
  group_by(GEO) %>%
  summarise(unemp_rate = mean(unemp_rate, na.rm = T)) %>%
  mutate(unemp_rate = ifelse(is.nan(unemp_rate)==1,NA,unemp_rate)) %>%
  ungroup() 

region_vars <- region_vars %>% left_join(unemp, by = "GEO")

# 4. college educated share in 2014
college <- read.csv("college_educated.csv")
college <- college %>%
  select(GEO, Value) %>%
  mutate(Value = ifelse(Value==":", NA, Value)) %>%
  mutate(Value = gsub(",", "", Value)) %>%
  mutate(Value = as.numeric(Value)) %>%
  rename(college_pop = Value)

region_vars <- region_vars %>% left_join(college, by = "GEO")

# 5. male share in 2014
male <- read.csv("male.csv")
male <- male %>%
  select(GEO, Value) %>%
  mutate(Value = ifelse(Value==":", NA, Value)) %>%
  mutate(Value = gsub(",", "", Value)) %>%
  mutate(Value = as.numeric(Value)) %>%
  rename(male_pop = Value)

region_vars <- region_vars %>% left_join(male, by = "GEO")

# 6. non-citizens in 2014
foreign_born <- read.csv("foreign_born.csv")
foreign_born <- foreign_born %>%
  filter(TIME == 2014) %>%
  filter(CITIZEN == "Foreign country") %>%
  select(GEO, Value) %>%
  mutate(Value = ifelse(Value==":", NA, Value)) %>%
  mutate(Value = gsub(",", "", Value)) %>%
  mutate(Value = as.numeric(Value)) %>%
  rename(foreign_pop = Value)

region_vars <- region_vars %>% left_join(foreign_born, by = "GEO")

# Calculate share of college educated, male, imm, manufacturing
region_vars <- region_vars %>% 
  mutate(college_share = college_pop/pop,
         male_share = male_pop/pop,
         imm_share = foreign_pop/pop)

# Remove observations for the EU
region_vars <- region_vars %>% filter(!GEO %in% c("EU27_2020", "EU28", "EU27_2007"))

# Append NUTS 2013 code to match ESS data
region_vars <- region_vars %>%
  recode_nuts("GEO", nuts_year = 2013 ) %>%
  select(-typology, -typology_change)

write.csv(region_vars, "regional_variables.csv", row.names = FALSE)

####################################
# Compute job changes
####################################
erm <- read.csv("erm.csv", stringsAsFactors = FALSE)

# remove World and EU
erm <- erm %>% filter(!nutscode %in% c("WO", "EU"))

# Date
erm$date <- as.Date(erm$announcementdate, format = "%d/%m/%Y")
erm$year <- substr(erm$date, 1, 4)

# Filter cases between 2015-2017
erm <- erm %>% filter(year <= 2017, year>=2015,
                      country %in% c("Austria", "Belgium", "Switzerland",
                                     "Germany", "Spain", "Finland", "France",
                                     "United Kingdom", "Ireland", "Italy", 
                                     "Netherlands", "Norway", "Portugal", "Sweden"))

# Create Nuts 2 code
erm <- erm %>%
  mutate(geo = substr(nutscode,1,4)) %>% 
  filter(nchar(geo) == 4)

# NUTS 1 for Geramany, Italy, UK to match ESS data
erm$geo[grepl("DE|IT|UK",erm$geo)] <- substr(erm$geo[grepl("DE|IT|UK",erm$geo)],1,3)

# Correct typo
erm$geo[erm$geo=="Dec-"] <- "DEC"

## 1-1) Manufacturing Jobs
erm_manu <- erm %>%
  filter(sectorname == "Manufacturing") 

erm_manu <- erm_manu %>%
  group_by(geo) %>%
  mutate(plannedjobreductionsmin = sum(plannedjobreductionsmin,na.rm = T),
         plannedjobcreation = sum(plannedjobcreation,na.rm = T)) %>%
  mutate(netjobreduction = plannedjobreductionsmin - plannedjobcreation ) %>%
  select(geo, netjobreduction) %>%
  ungroup() %>%
  distinct() 

colnames(erm_manu) <- paste0(colnames(erm_manu),"_manu")
colnames(erm_manu)[1] <- "geo"

# 1-2) Non-manufacturing jobs
# Create Nuts 2 code
erm_nonmanu <- erm %>%
  filter(sectorname != "Manufacturing") 

# Calculate net job reductions
erm_nonmanu <- erm_nonmanu %>%
  group_by(geo) %>%
  mutate(plannedjobreductionsmin = sum(plannedjobreductionsmin,na.rm = T),
         plannedjobcreation = sum(plannedjobcreation,na.rm = T)) %>%
  mutate(netjobreduction = plannedjobreductionsmin - plannedjobcreation ) %>%
  select(geo, netjobreduction) %>%
  ungroup() %>%
  distinct() 

colnames(erm_nonmanu) <- paste0(colnames(erm_nonmanu),"_non_manu")
colnames(erm_nonmanu)[1] <- "geo"

# Combine manufacturing and non-manufacturing data
erm_cleaned <- erm_manu %>% left_join(erm_nonmanu, by= "geo") 

# Save
write.csv(erm_cleaned, "erm_cleaned.csv", row.names = FALSE)
